2  In class

Data Analyses with R

Author

F. Mengü Aydın

Published

October 19, 2022

3 Planes Dataset Analysis with dplyr

3.1 Library process

library(nycflights13)
library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.3      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(dplyr)

3.2 Summary of planes table

summary(planes)
   tailnum               year          type           manufacturer      
 Length:3322        Min.   :1956   Length:3322        Length:3322       
 Class :character   1st Qu.:1997   Class :character   Class :character  
 Mode  :character   Median :2001   Mode  :character   Mode  :character  
                    Mean   :2000                                        
                    3rd Qu.:2005                                        
                    Max.   :2013                                        
                    NA's   :70                                          
    model              engines          seats           speed      
 Length:3322        Min.   :1.000   Min.   :  2.0   Min.   : 90.0  
 Class :character   1st Qu.:2.000   1st Qu.:140.0   1st Qu.:107.5  
 Mode  :character   Median :2.000   Median :149.0   Median :162.0  
                    Mean   :1.995   Mean   :154.3   Mean   :236.8  
                    3rd Qu.:2.000   3rd Qu.:182.0   3rd Qu.:432.0  
                    Max.   :4.000   Max.   :450.0   Max.   :432.0  
                                                    NA's   :3299   
    engine         
 Length:3322       
 Class :character  
 Mode  :character  
                   
                   
                   
                   
head(planes)
# A tibble: 6 × 9
  tailnum  year type                    manuf…¹ model engines seats speed engine
  <chr>   <int> <chr>                   <chr>   <chr>   <int> <int> <int> <chr> 
1 N10156   2004 Fixed wing multi engine EMBRAER EMB-…       2    55    NA Turbo…
2 N102UW   1998 Fixed wing multi engine AIRBUS… A320…       2   182    NA Turbo…
3 N103US   1999 Fixed wing multi engine AIRBUS… A320…       2   182    NA Turbo…
4 N104UW   1999 Fixed wing multi engine AIRBUS… A320…       2   182    NA Turbo…
5 N10575   2002 Fixed wing multi engine EMBRAER EMB-…       2    55    NA Turbo…
6 N105UW   1999 Fixed wing multi engine AIRBUS… A320…       2   182    NA Turbo…
# … with abbreviated variable name ¹​manufacturer
tail(planes)
# A tibble: 6 × 9
  tailnum  year type                    manuf…¹ model engines seats speed engine
  <chr>   <int> <chr>                   <chr>   <chr>   <int> <int> <int> <chr> 
1 N996DL   1991 Fixed wing multi engine MCDONN… MD-88       2   142    NA Turbo…
2 N997AT   2002 Fixed wing multi engine BOEING  717-…       2   100    NA Turbo…
3 N997DL   1992 Fixed wing multi engine MCDONN… MD-88       2   142    NA Turbo…
4 N998AT   2002 Fixed wing multi engine BOEING  717-…       2   100    NA Turbo…
5 N998DL   1992 Fixed wing multi engine MCDONN… MD-88       2   142    NA Turbo…
6 N999DN   1992 Fixed wing multi engine MCDONN… MD-88       2   142    NA Turbo…
# … with abbreviated variable name ¹​manufacturer

4 Planes numbers by year

planes_by_year <- group_by(planes,year)
 g <- summarise (planes_by_year,count =  length(year))
print(g, n=47)
# A tibble: 47 × 2
    year count
   <int> <int>
 1  1956     1
 2  1959     2
 3  1963     2
 4  1965     1
 5  1967     1
 6  1968     1
 7  1972     1
 8  1973     1
 9  1974     1
10  1975     3
11  1976     3
12  1977     2
13  1978     2
14  1979     4
15  1980     4
16  1983     1
17  1984     5
18  1985    23
19  1986    17
20  1987    40
21  1988    75
22  1989    60
23  1990    90
24  1991   108
25  1992   109
26  1993    59
27  1994    48
28  1995    54
29  1996    55
30  1997    74
31  1998   174
32  1999   206
33  2000   244
34  2001   284
35  2002   212
36  2003   150
37  2004   192
38  2005   162
39  2006   126
40  2007   123
41  2008   147
42  2009    84
43  2010    48
44  2011    66
45  2012    95
46  2013    92
47    NA    70
m <- distinct(planes,manufacturer)
planes_by_manufacturer <- group_by(planes,manufacturer)
pbm <- summarise(planes_by_manufacturer,count = length(year))
print (pbm,n=35)
# A tibble: 35 × 2
   manufacturer                  count
   <chr>                         <int>
 1 AGUSTA SPA                        1
 2 AIRBUS                          336
 3 AIRBUS INDUSTRIE                400
 4 AMERICAN AIRCRAFT INC             2
 5 AVIAT AIRCRAFT INC                1
 6 AVIONS MARCEL DASSAULT            1
 7 BARKER JACK L                     1
 8 BEECH                             2
 9 BELL                              2
10 BOEING                         1630
11 BOMBARDIER INC                  368
12 CANADAIR                          9
13 CANADAIR LTD                      1
14 CESSNA                            9
15 CIRRUS DESIGN CORP                1
16 DEHAVILLAND                       1
17 DOUGLAS                           1
18 EMBRAER                         299
19 FRIEDEMANN JON                    1
20 GULFSTREAM AEROSPACE              2
21 HURLEY JAMES LARRY                1
22 JOHN G HESS                       1
23 KILDALL GARY                      1
24 LAMBERT RICHARD                   1
25 LEARJET INC                       1
26 LEBLANC GLENN T                   1
27 MARZ BARRY                        1
28 MCDONNELL DOUGLAS               120
29 MCDONNELL DOUGLAS AIRCRAFT CO   103
30 MCDONNELL DOUGLAS CORPORATION    14
31 PAIR MIKE E                       1
32 PIPER                             5
33 ROBINSON HELICOPTER CO            1
34 SIKORSKY                          1
35 STEWART MACO                      2
md <- planes %>% filter(manufacturer=="BOEING")
max (md$year)
[1] NA